rm(list=ls())
gc()

require(readr)
require(dplyr)
require(plyr)
require(tidyverse)
require(cobalt)
require(MatchIt)
require(ggplot2)
require(lubridate)

# This script aggregates the voterfile data by birthdate cohort, creating sums of relevant variables by date of birth.

files = list.files('data/voterfile')
for(file in files){
  st = str_sub(file, end = 2) 
  load(paste0('data/voterfile/',file))
  
  
  
  

  
  dd = as.data.frame(table(data$birth.date))
  colnames(dd) = c("dob", "registered")
  dd$dob = ymd(dd$dob)
  dd$yday = yday(dd$dob)
  dd$year = year(dd$dob)
  dd$weekday =tolower(weekdays(as.Date(dd$dob)))
  dd$month =tolower(months(as.Date(dd$dob)))

  
  
  data$registered = 1

  all=aggregate(data[,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                        "democrat", "republican")], by=list(dob=data$birth.date), FUN=sum)
  
  white=aggregate(data[data$white==1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                        "democrat", "republican", "registered")], by=list(dob=data$birth.date[data$white==1]), FUN=sum)
  
  black=aggregate(data[data$black==1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                        "democrat",  "republican", "registered")], by=list(dob=data$birth.date[data$black==1]), FUN=sum)
  hispanic=aggregate(data[data$hispanic==1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                        "democrat",  "republican", "registered")], by=list(dob=data$birth.date[data$hispanic==1]), FUN=sum)
 
 
female_white = aggregate(data[data$female==1 & data$white == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican", "registered")], by=list(dob=data$birth.date[data$female==1 & data$white == 1]), FUN=sum)

female_black = aggregate(data[data$female==1 & data$black == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican",  "registered")], by=list(dob=data$birth.date[data$female==1 & data$black == 1]), FUN=sum)

female_hispanic = aggregate(data[data$female==1 & data$hispanic == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican", "registered")], by=list(dob=data$birth.date[data$female==1 & data$hispanic == 1]), FUN=sum)


male_white = aggregate(data[data$female==0 & data$white == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican", "registered")], by=list(dob=data$birth.date[data$female==0 & data$white == 1]), FUN=sum)

male_black = aggregate(data[data$female==0 & data$black == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican",  "registered")], by=list(dob=data$birth.date[data$female==0 & data$black == 1]), FUN=sum)

male_hispanic = aggregate(data[data$female==0 & data$hispanic == 1 ,c("vote.gen2016","vote.gen2014", "vote.gen2012", "vote.gen2010", "vote.gen2008","vote.gen2006", "vote.gen2004", "vote.gen2002", "vote.gen2000",
                                             "democrat", "republican",  "registered")], by=list(dob=data$birth.date[data$female==0 & data$hispanic == 1]), FUN=sum)
 
  
  colnames(white)[2:ncol(white)] = paste0(colnames(white), "_white")[2:ncol(white)]
  
  colnames(black)[2:ncol(black)] = paste0(colnames(black), "_black")[2:ncol(black)]
  
   colnames(hispanic)[2:ncol(hispanic)] = paste0(colnames(hispanic), "_hispanic")[2:ncol(hispanic)]


 colnames(female_white)[2:ncol(female_white)] = paste0(colnames(female_white), "_female_white")[2:ncol(female_white)]

 colnames(female_black)[2:ncol(female_black)] = paste0(colnames(female_black), "_female_black")[2:ncol(female_black)]

 colnames(female_hispanic)[2:ncol(female_hispanic)] = paste0(colnames(female_hispanic), "_female_hispanic")[2:ncol(female_hispanic)]


 colnames(male_white)[2:ncol(male_white)] = paste0(colnames(male_white), "_male_white")[2:ncol(male_white)]

 colnames(male_black)[2:ncol(male_black)] = paste0(colnames(male_black), "_male_black")[2:ncol(male_black)]

 colnames(male_hispanic)[2:ncol(male_hispanic)] = paste0(colnames(male_hispanic), "_male_hispanic")[2:ncol(male_hispanic)]


 tomerge = left_join(all, white, by = "dob")
 
   tomerge = left_join(tomerge, black, by = "dob")
   tomerge = left_join(tomerge, hispanic, by = "dob")
   tomerge = left_join(tomerge, female_white, by = "dob")
   tomerge = left_join(tomerge, female_black, by = "dob")
   tomerge = left_join(tomerge, female_hispanic, by = "dob")

   tomerge = left_join(tomerge, male_white, by = "dob")
   tomerge = left_join(tomerge, male_black, by = "dob")
   tomerge = left_join(tomerge, male_hispanic, by = "dob")
   
   tomerge$dob = ymd(tomerge$dob)
   new_dd = left_join(dd, tomerge, by = "dob")
   
   new_dd$white = new_dd$registered_white
   new_dd$black = new_dd$registered_black
   new_dd$hispanic = new_dd$registered_hispanic
   
   new_dd$registered_white = NULL
   new_dd$registered_black = NULL
   new_dd$registered_hispanic = NULL
   
    for(col in colnames(new_dd)[8:ncol(new_dd)]){
      new_dd[,col] = ifelse(is.na(new_dd[,col]), 0, new_dd[,col])
    }
  ##############################################################
   
  
  save(new_dd, file = paste0("data/counts/","counts", "_", st, ".Rdata"))
  
  print(st)
  rm(new_dd,dd,white,black,hispanic,female_white,male_white,female_black,male_black,female_hispanic,male_hispanic,tomerge,data)
  gc()
}





